1. 读取输入数据 | Load Input Data
# 设置工作目录 | Set working directory
# setwd("your_data_path")
# 读取三个主要数据文件 | Read three main data files
drug_ann <- read.csv("easy_input_drugAnn.csv", stringsAsFactors = FALSE)
gene_ann <- read.csv("easy_input_geneAnn.csv", stringsAsFactors = FALSE)
hypoxia_data <- read.csv("easy_input_hypoxia.csv", stringsAsFactors = FALSE)
practice_data <- read.csv("easy_input_practice.csv", stringsAsFactors = FALSE)
# 查看数据结构 | View data structure
cat("=== 药物注释数据 | Drug Annotation ===\n")
## === 药物注释数据 | Drug Annotation ===
head(drug_ann)
## drug therapy n_sensitive n_resistant signaling
## 1 Drug 1 Immunotherapy 2 1 Signaling 1
## 2 Drug 2 Immunotherapy 2 1 Signaling 2
## 3 Drug 3 Immunotherapy 3 3 Signaling 3
## 4 Drug 4 Immunotherapy 4 2 Signaling 3
## 5 Drug 5 Immunotherapy NA 3 Signaling 4
## 6 Drug 6 Chemotherapy 3 3 Signaling 5
cat("\n=== 基因注释数据 | Gene Annotation ===\n")
##
## === 基因注释数据 | Gene Annotation ===
head(gene_ann)
## gene drug
## 1 Gene 1 Drug 1
## 2 Gene 2 Drug 2
## 3 Gene 2 Drug 3
## 4 Gene 3 Drug 3
## 5 Gene 4 Drug 4
## 6 Gene 5 Drug 4
cat("\n=== 低氧数据 | Hypoxia Data ===\n")
##
## === 低氧数据 | Hypoxia Data ===
head(hypoxia_data)
## cancer gene direction omics
## 1 Cancer10 Gene26 high Mutation
## 2 Cancer 5 Gene23 low Methylation
## 3 Cancer 3 Gene25 low Mutation
## 4 Cancer 3 Gene27 low Mutation
## 5 Cancer 5 Gene21 high Mutation
## 6 Cancer 4 Gene16 low Protein
cat("\n=== 实践数据 | Practice Data ===\n")
##
## === 实践数据 | Practice Data ===
head(practice_data)
## x
## 1 Gene21-Cancer 5
## 2 Gene13-Cancer 9
## 3 Gene 2-Cancer 9
## 4 Gene 9-Cancer 4
## 5 Gene16-Cancer 5
## 6 Gene 5-Cancer 9
# 数据清理:统一基因名称格式 | Data cleaning: unify gene name format
hypoxia_data$gene <- str_trim(hypoxia_data$gene)
hypoxia_data$cancer <- str_trim(hypoxia_data$cancer)
gene_ann$gene <- str_trim(gene_ann$gene)
drug_ann$drug <- str_trim(drug_ann$drug)
2. 左侧图:基因-癌症关联矩阵 | Left Panel: Gene-Cancer Association
Matrix
# 准备左侧面板数据 | Prepare left panel data
# 获取所有唯一的基因 | Get unique genes sorted by frequency
left_panel_data <- hypoxia_data %>%
group_by(gene) %>%
summarise(n_assoc = n(), .groups = "drop") %>%
arrange(desc(n_assoc))
# 获取排序后的基因列表(用于后续的一致性) | Get sorted gene list for consistency
gene_order <- left_panel_data$gene
cat("左侧面板基因顺序 | Gene order in left panel:\n")
## 左侧面板基因顺序 | Gene order in left panel:
print(gene_order)
## [1] "Gene21" "Gene23" "Gene28" "Gene 6" "Gene16" "Gene 1" "Gene 7" "Gene 9"
## [9] "Gene20" "Gene 2" "Gene 8" "Gene12" "Gene17" "Gene18" "Gene19" "Gene22"
## [17] "Gene24" "Gene25" "Gene26" "Gene 3" "Gene 4" "Gene10" "Gene13" "Gene14"
## [25] "Gene27" "Gene29" "Gene 5" "Gene15" "Gene30" "Gene11"
# 转换omics类型为形状值 | Convert omics types to shape values
omics_shape_map <- c(
"mRNA" = 15, # 方形 | square
"Protein" = 16, # 圆形 | circle
"Methylation" = 17, # 三角形 | triangle
"Mutation" = 18, # 菱形 | diamond
"CNV" = 8 # 星形 | asterisk
)
# 准备绘图数据 | Prepare plotting data
plot_data_left <- hypoxia_data %>%
mutate(
shape = omics_shape_map[omics],
color = ifelse(direction == "high", "red", "blue"),
# 确保基因因子顺序 | Ensure gene factor order
gene = factor(gene, levels = gene_order)
) %>%
arrange(gene)
# 获取所有唯一的癌症类型 | Get unique cancer types
cancer_order <- sort(unique(hypoxia_data$cancer))
plot_data_left <- plot_data_left %>%
mutate(
cancer = factor(cancer, levels = cancer_order)
)
# 绘制左侧面板 | Plot left panel
p_left <- ggplot(plot_data_left, aes(x = cancer, y = gene)) +
geom_point(aes(shape = omics, color = direction),
size = 4, alpha = 0.9, stroke = 1.2) +
scale_shape_manual(
name = "omics",
values = omics_shape_map,
breaks = names(omics_shape_map)
) +
scale_color_manual(
name = "direction",
values = c("high" = "#E41A1C", "low" = "#377EB8"),
breaks = c("high", "low")
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
axis.text.y = element_text(size = 8),
axis.title = element_blank(),
panel.grid.major = element_line(color = "gray95", linewidth = 0.2),
panel.grid.minor = element_blank(),
legend.position = "right",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
plot.title = element_text(hjust = 0, size = 12, face = "bold", margin = margin(b = 10))
) +
labs(title = "A. Gene-Cancer Association Matrix")
print(p_left)

3. 中间图:基因-药物连接线(保持左侧基因顺序) | Middle Panel:
Gene-Drug Connection Lines (Consistent Gene Order)
# 准备中间面板数据:基因-药物关联 | Prepare middle panel data: gene-drug associations
middle_data <- gene_ann %>%
left_join(
hypoxia_data %>% select(gene) %>% distinct(),
by = "gene"
) %>%
filter(!is.na(gene)) # 只保留在hypoxia数据中有的基因 | Keep only genes in hypoxia data
# 确保基因顺序与左侧一致 | Ensure gene order matches left panel
middle_data <- middle_data %>%
mutate(
gene = factor(gene, levels = gene_order),
# 根据因子级别分配y坐标 | Assign y-coordinate based on factor level
gene_y = as.numeric(gene)
) %>%
arrange(gene_y) %>%
select(-gene_y)
# 获取药物y坐标 | Get drug y-coordinates
drug_list <- unique(middle_data$drug)
drug_pos <- data.frame(
drug = drug_list,
drug_y = seq_along(drug_list)
)
# 添加药物信息和y坐标 | Add drug info and y-coordinates
middle_plot_data <- middle_data %>%
left_join(drug_pos, by = "drug") %>%
left_join(
drug_ann %>% select(drug, therapy, signaling),
by = "drug"
) %>%
mutate(
# 根据signaling分配连线颜色 | Assign line colors by signaling
line_color = case_when(
signaling %in% c("Signaling 1", "Signaling 2") ~ "#9370DB", # 紫色 | Purple
signaling %in% c("Signaling 3", "Signaling 4", "Signaling 5", "Signaling 6") ~ "#FF4444", # 红色 | Red
signaling == "Signaling 7" ~ "#4169E1", # 蓝色 | Blue
signaling %in% c("Signaling 8", "Signaling 9", "Signaling10") ~ "#FFA500", # 橙色 | Orange
TRUE ~ "#CCCCCC" # 灰色 | Gray
),
gene_numeric = as.numeric(as.factor(gene))
)
# 为了保持顺序,重新编号基因的y坐标 | Renumber gene y-coordinates to maintain order
gene_y_map <- data.frame(
gene = gene_order,
gene_y = seq_along(gene_order)
)
middle_plot_data <- middle_plot_data %>%
left_join(gene_y_map, by = "gene")
cat("中间面板数据摘要 | Middle panel data summary:\n")
## 中间面板数据摘要 | Middle panel data summary:
cat("连接数量 | Number of connections:", nrow(middle_plot_data), "\n")
## 连接数量 | Number of connections: 36
cat("基因数量 | Number of genes:", n_distinct(middle_plot_data$gene), "\n")
## 基因数量 | Number of genes: 30
cat("药物数量 | Number of drugs:", n_distinct(middle_plot_data$drug), "\n\n")
## 药物数量 | Number of drugs: 20
# 绘制中间面板 | Plot middle panel
p_middle <- ggplot(middle_plot_data) +
# 绘制连接线 | Draw connection lines
geom_segment(
aes(x = 0, y = gene_y, xend = 1, yend = drug_y, color = line_color),
alpha = 0.35,
linewidth = 0.5
) +
# 添加基因点 | Add gene points
geom_point(
data = gene_y_map,
aes(x = 0, y = gene_y),
size = 2.5,
color = "steelblue",
alpha = 0.8
) +
# 添加药物点 | Add drug points
geom_point(
data = drug_pos,
aes(x = 1, y = drug_y),
size = 2.5,
color = "darkred",
alpha = 0.8
) +
# 添加基因标签 | Add gene labels
geom_text(
data = gene_y_map,
aes(x = -0.08, y = gene_y, label = gene),
hjust = 1,
size = 2.5,
color = "steelblue",
fontface = "bold"
) +
# 添加药物标签 | Add drug labels
geom_text(
data = drug_pos,
aes(x = 1.08, y = drug_y, label = drug),
hjust = 0,
size = 2.5,
color = "darkred",
fontface = "bold"
) +
scale_color_identity() +
xlim(-0.25, 1.25) +
ylim(0.5, max(drug_pos$drug_y) + 0.5) +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5, size = 12, face = "bold", margin = margin(b = 10))
) +
labs(title = "B. Gene-Drug-Pathway Links")
print(p_middle)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).

4. 右侧图:药物响应条形图 | Right Panel: Drug Response Bar
Chart
# 准备右侧面板数据 | Prepare right panel data
right_panel_data <- drug_ann %>%
filter(drug %in% drug_list) %>% # 只包含中间面板涉及的药物 | Include only drugs from middle panel
select(drug, therapy, signaling, n_sensitive, n_resistant) %>%
mutate(
# 处理NA值 | Handle NA values
n_sensitive = ifelse(is.na(n_sensitive), 0, n_sensitive),
n_resistant = ifelse(is.na(n_resistant), 0, n_resistant),
# 分配治疗方式颜色 | Assign therapy colors
therapy_color = case_when(
therapy == "Chemotherapy" ~ "#E41A1C", # 红色 | Red
therapy == "Hormone therapy" ~ "#377EB8", # 蓝色 | Blue
therapy == "Immunotherapy" ~ "#4DAF4A", # 绿色 | Green
therapy == "Targeted therapy" ~ "#FF7F00", # 橙色 | Orange
TRUE ~ "#999999" # 灰色 | Gray
),
# 分配signaling背景颜色 | Assign signaling background
signaling_bg = case_when(
signaling %in% c("Signaling 1", "Signaling 2") ~ "#E6D7F0", # 浅紫 | Light purple
signaling %in% c("Signaling 3", "Signaling 4", "Signaling 5", "Signaling 6") ~ "#FFE6E6", # 浅红 | Light red
signaling == "Signaling 7" ~ "#E6F0FF", # 浅蓝 | Light blue
signaling %in% c("Signaling 8", "Signaling 9", "Signaling10") ~ "#FFF0E6", # 浅橙 | Light orange
TRUE ~ "#F5F5F5" # 浅灰 | Light gray
)
)
# 按治疗方式和signaling分组排序 | Sort by therapy and signaling
right_panel_data <- right_panel_data %>%
arrange(factor(therapy, levels = c("Chemotherapy", "Hormone therapy",
"Immunotherapy", "Targeted therapy")),
signaling) %>%
mutate(
# 为了在facet中保持顺序,创建drug factor | Create drug factor to maintain order in facet
drug = factor(drug, levels = drug),
signaling_order = as.numeric(factor(signaling, levels = unique(signaling)))
)
# 数据透视:将敏感和耐药分开 | Pivot data: separate sensitive and resistant
right_plot_data <- right_panel_data %>%
pivot_longer(
cols = c(n_sensitive, n_resistant),
names_to = "response_type",
values_to = "count"
) %>%
mutate(
response_type = factor(
response_type,
levels = c("n_sensitive", "n_resistant"),
labels = c("n_sensitive", "n_resistant")
),
response_color = ifelse(response_type == "n_sensitive", "#2ECC71", "#E91E63") # 绿色/品红 | Green/Magenta
)
# 绘制右侧面板 | Plot right panel
p_right <- ggplot(right_plot_data, aes(x = count, y = drug, fill = response_color)) +
geom_bar(stat = "identity", position = "identity", alpha = 0.85, width = 0.7) +
# 添加facet背景 | Add facet background with signaling colors
facet_wrap(
~factor(therapy, levels = c("Chemotherapy", "Hormone therapy",
"Immunotherapy", "Targeted therapy")),
scales = "free_y",
ncol = 1
) +
scale_fill_identity(
name = "cat",
breaks = c("#2ECC71", "#E91E63"),
labels = c("#2ECC71" = "n_sensitive", "#E91E63" = "n_resistant"),
guide = guide_legend(override.aes = list(alpha = 0.85))
) +
scale_x_continuous(
limits = c(0, 12),
breaks = seq(0, 12, 4),
labels = c("0", "4", "8", "12")
) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 8, color = right_plot_data$therapy_color),
axis.text.x = element_text(size = 9),
axis.title = element_blank(),
strip.text = element_text(size = 10, face = "bold", margin = margin(b = 5)),
strip.background = element_rect(fill = "gray95", color = "gray80"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "right",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
plot.title = element_text(hjust = 0, size = 12, face = "bold", margin = margin(b = 10))
) +
labs(title = "C. Drug Response Profile", x = "# Cancers")
print(p_right)

5. 组合完整图形 | Combine All Panels
# 使用patchwork组合三个图 | Combine three plots using patchwork
combined_plot <- (p_left | p_middle | p_right) +
plot_layout(
widths = c(2.5, 1.5, 3),
guides = "collect"
) +
plot_annotation(
title = "多层次基因-药物-通路关联分析 | Multi-level Gene-Drug-Pathway Association Analysis",
theme = theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold", margin = margin(b = 15))
)
)
print(combined_plot)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).

# 保存高质量PDF | Save high-quality PDF
ggsave(
filename = "gene_drug_pathway_analysis.pdf",
plot = combined_plot,
width = 20,
height = 14,
dpi = 300,
units = "in"
)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 多 (U+591A)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 层 (U+5C42)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 次 (U+6B21)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 基 (U+57FA)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 因 (U+56E0)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 药 (U+836F)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 物 (U+7269)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 通 (U+901A)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 路 (U+8DEF)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 关 (U+5173)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 联 (U+8054)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 分 (U+5206)
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## conversion failure on '多层次基因-药物-通路关联分析 | Multi-level
## Gene-Drug-Pathway Association Analysis' in 'mbcsToSbcs': for 析 (U+6790)
cat("\n✓ 图形已保存为 gene_drug_pathway_analysis.pdf\n")
##
## ✓ 图形已保存为 gene_drug_pathway_analysis.pdf
# 保存为PNG | Save as PNG
ggsave(
filename = "gene_drug_pathway_analysis.png",
plot = combined_plot,
width = 20,
height = 14,
dpi = 300,
units = "in"
)
## Warning: Removed 11 rows containing missing values or values outside the scale
## range (`geom_segment()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale
## range (`geom_text()`).
cat("✓ 图形已保存为 gene_drug_pathway_analysis.png\n")
## ✓ 图形已保存为 gene_drug_pathway_analysis.png
6. 数据统计汇总 | Data Summary Statistics
cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("基因-癌症关联统计 | Gene-Cancer Association Statistics\n")
## 基因-癌症关联统计 | Gene-Cancer Association Statistics
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("总关联数 | Total associations:", nrow(hypoxia_data), "\n")
## 总关联数 | Total associations: 151
cat("独特基因数 | Number of unique genes:", n_distinct(hypoxia_data$gene), "\n")
## 独特基因数 | Number of unique genes: 30
cat("独特癌症类型数 | Number of unique cancers:", n_distinct(hypoxia_data$cancer), "\n")
## 独特癌症类型数 | Number of unique cancers: 10
cat("组学数据类型分布 | Omics type distribution:\n")
## 组学数据类型分布 | Omics type distribution:
print(table(hypoxia_data$omics))
##
## CNV Methylation mRNA Mutation Protein
## 27 23 37 35 29
cat("\n表达方向分布 | Expression direction distribution:\n")
##
## 表达方向分布 | Expression direction distribution:
print(table(hypoxia_data$direction))
##
## high low
## 71 80
cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("基因-药物-通路关联统计 | Gene-Drug-Pathway Association Statistics\n")
## 基因-药物-通路关联统计 | Gene-Drug-Pathway Association Statistics
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("总关联数 | Total associations:", nrow(middle_plot_data), "\n")
## 总关联数 | Total associations: 36
cat("涉及基因数 | Number of genes:", n_distinct(middle_plot_data$gene), "\n")
## 涉及基因数 | Number of genes: 30
cat("涉及药物数 | Number of drugs:", n_distinct(middle_plot_data$drug), "\n")
## 涉及药物数 | Number of drugs: 20
cat("涉及通路数 | Number of pathways:", n_distinct(middle_plot_data$signaling), "\n")
## 涉及通路数 | Number of pathways: 10
cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("治疗方式分布 | Therapy Type Distribution\n")
## 治疗方式分布 | Therapy Type Distribution
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
print(table(drug_ann$therapy))
##
## Chemotherapy Hormone therapy Immunotherapy Targeted therapy
## 5 5 5 5
cat("\n")
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
cat("通路分布 | Signaling Pathway Distribution\n")
## 通路分布 | Signaling Pathway Distribution
cat(paste(rep("=", 60), collapse = ""), "\n")
## ============================================================
print(table(drug_ann$signaling))
##
## Signaling 1 Signaling 2 Signaling 3 Signaling 4 Signaling 5 Signaling 6
## 1 1 2 1 2 3
## Signaling 7 Signaling 8 Signaling 9 Signaling10
## 5 1 2 2